import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import datetime
import pandas as pd
import matplotlib.pyplot as plt

# Settings
plt.rcParams.update({'font.size': 10})

# ✅ Function to scrape one date's articles from Dhaka Tribune
def scrape_date(date):
    url = f"https://www.dhakatribune.com/archive/{date.strftime('%Y-%m-%d')}"
    print(f"Scraping URL: {url}")
    
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"❌ Failed to fetch data for {date.strftime('%Y-%m-%d')}: {e}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')

    # Target text-containing tags (adjusted for Dhaka Tribune)
    text_elements = soup.find_all(['h2', 'p'])  # or just 'p' if you want less noise

    sentiments = []
    for element in text_elements:
        text = element.get_text(strip=True)
        if len(text) > 30:  # Filter out short/irrelevant texts
            polarity = TextBlob(text).sentiment.polarity
            if polarity != 0.0:
                sentiments.append(polarity)
                print(f"Sentiment: {polarity:.2f} | Text: {text[:60]}...")

    return sentiments

# ✅ Ask for start and end date
start_date_input = input("Please enter the start date (YYYY-MM-DD): ")
end_date_input = input("Please enter the end date (YYYY-MM-DD): ")

start_date = datetime.datetime.strptime(start_date_input, '%Y-%m-%d')
end_date = datetime.datetime.strptime(end_date_input, '%Y-%m-%d')

# ✅ Scrape all dates in range
all_sentiments = []
current_date = start_date

while current_date <= end_date:
    print(f"\n📅 Processing: {current_date.strftime('%Y-%m-%d')}")
    sentiments = scrape_date(current_date)
    all_sentiments.extend(sentiments)
    current_date += datetime.timedelta(days=1)

# ✅ Save to Excel on your desktop
output_path = r"C:\Users\lab44\Desktop\New Microsoft Excel Worksheet.xlsx"
df = pd.DataFrame(all_sentiments, columns=["Sentiment Score"])
df.to_excel(output_path, index=False)
print(f"\n✅ Sentiment scores saved to: {output_path}")

# ✅ Plot histogram
plt.rcParams.update({'font.size': 8})
plt.figure(figsize=(5, 4))
plt.hist(all_sentiments, bins='sturges', color='magenta', range=(-1, 1), edgecolor="black")
plt.title(f'Dhaka Tribune ({start_date_input} to {end_date_input})', fontsize=9)
plt.xlabel('Sentiment Score', fontsize=8)
plt.ylabel('Frequency', fontsize=8)
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
plt.tight_layout()
plt.show()
